## packages: remove or add your necessary packages

# required_packages <- c("tidyverse", "readxl", "ggthemes", "hrbrthemes", "extrafont", "plotly", "scales", "stringr", "gganimate", "here", "tidytext", "sentimentr", "scales", "DT", "here", "sm", "mblm", "glue", "fs", "knitr", "rmdformats", "janitor", "urltools", "colorspace", "pdftools", "showtext")

required_packages <- c("tidyverse", "here", "colorspace", "pdftools", "kableExtra", "ggrepel")

for(i in required_packages) { 
  if(!require(i, character.only = T)) {
    
    #  if package is not existing, install then load the package
    install.packages(i, dependencies = T)
    require(i, character.only = T)
  }
}



## save plots?
save <- FALSE
#save <- FALSE

## quality of png's
dpi <- 750


## theme updates; please adjust to client´s website

base_size = 10

my_theme <- function(){
  theme_minimal() +
    theme(
      plot.margin = margin(30, 30, 30, 30),
      plot.background = element_rect(color = "white",
                                     fill = "white"),
      plot.title = element_text(size = 1.4 * base_size),
      plot.title.position = "plot",
      plot.subtitle = element_text(size = base_size),
      plot.caption = element_text(color = "grey40",
                                  size = 0.9 * base_size),
      plot.caption.position = "plot",
      axis.line.x = element_line(color = "black",
                                 size = .8),
      axis.line.y = element_line(color = "black",
                                 size = .8),
      axis.title.x = element_text(size =  base_size,
                                  face = "bold"),
      axis.title.y = element_text(size = base_size,
                                  face = "bold"),
      axis.text = element_text(size = 0.9 * base_size,
                               color = "black",
                               face = "bold"),
      # axis.ticks = element_blank(),
      axis.ticks = element_line(color = "black"),
      panel.grid.major.x = element_line(size = .4,
                                        color = "#eaeaea",
                                        linetype = "solid"),
      panel.grid.major.y = element_line(size = .4,
                                        color = "#eaeaea",
                                        linetype = "solid"),
      panel.grid.minor.x = element_line(size = .4,
                                        color = "#eaeaea",
                                        linetype = "solid"),
      panel.grid.minor.y = element_blank(),
      panel.spacing.x = unit(2, "lines"),
      panel.spacing.y = unit(1, "lines")
    )
}


## numeric format for labels
num_format <- scales::format_format(big.mark = ",", small.mark = ",", scientific = F)

## main colors rankings.io
r_col <- "#D21D5C"

Load data

results <- rio::import(here::here("proc_data", "proc_data.rds")) %>%
  mutate(
    City = forcats::as_factor(City),
    date = lubridate::ymd(date)
  )

Data exploration

glimpse(results)
## Rows: 1,888
## Columns: 5
## $ Sample_ID          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ City               <fct> San Antonio, San Antonio, San Antonio, San Antonio,…
## $ Initial_Ahref_rank <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, …
## $ date               <date> 2021-03-05, 2021-03-06, 2021-03-07, 2021-03-08, 20…
## $ rank               <int> 7, 11, 9, 10, 10, 12, 9, 7, 7, 6, 12, 15, 15, 14, 1…
results %>%
  ggplot() +
  geom_line(aes(x=date, y=rank, group=Sample_ID)) +
  # geom_smooth(aes(x=date, y=rank)) +
  geom_line(data = results %>% group_by(date) %>% summarize(avg = mean(rank, na.rm = TRUE)),
            aes(x=date, y=avg),
            color = r_col,
            size = 2) +
  my_theme() +
  scale_y_reverse(breaks = c(1,5,10,15,20,25,30)) +
  scale_x_date(
    breaks = c(min(results$date), "2021-03-12", "2021-03-12", "2021-03-19",
               "2021-03-26", "2021-04-02", max(results$date)),
    date_labels = "%d %b"
    # date_breaks = "1 week",
    # date_minor_breaks = "1 day"
  ) +
  labs(
    title = "Ranking Over Time" ,
    subtitle = "Ranking of all samples over time and their average in pink."
  )

if(save == T){ 
  ggsave(here::here("plots", "all_over_time.pdf"), name_plot, 
         width = 12.5, height = 8, device = cairo_pdf)
}
  • Chaotic start, bump in the middle after 2 weeks, stabilization
  • No large, clear, and major effect
  • Need to average position for a few days to get stable estimate because variability ad missing values at the start.

Key Figures

Average Ranking Improvement

# average rank of first five days:
avg_start_1w <- results %>% filter(date < "2021-03-12") %>% 
  # first the first week average by sample:
  group_by(Sample_ID) %>% 
  summarize(avg = mean(rank, na.rm = TRUE)) %>% 
  # then the average of the samples:
  ungroup() %>%
  summarize(avg = mean(avg)) %>%
  as.numeric()

# average rank of last five days:
avg_end_1w <- results %>% filter(date > "2021-03-29") %>% 
  # first the first week average by sample:
  group_by(Sample_ID) %>% 
  summarize(avg = mean(rank, na.rm = TRUE)) %>% 
  # then the average of the samples:
  ungroup() %>%
  summarize(avg = mean(avg)) %>%
  as.numeric()

rank_gain = avg_start_1w - avg_end_1w

On average, the law firms gained 2 ranks over the month (average of first week - average of last week).

all_avg_start_1w <- results %>% 
  filter(date < "2021-03-12") %>%
  group_by(Sample_ID) %>% 
  summarise(`First Week` = mean(rank, na.rm = TRUE))
all_avg_end_1w <- results %>% 
  filter(date > "2021-03-29") %>%
  group_by(Sample_ID) %>% 
  summarise(`Last Week` = mean(rank, na.rm = TRUE))

all_avg_wide = all_avg_start_1w %>% 
  left_join(all_avg_end_1w, by = "Sample_ID") %>%
  mutate(
    start_page = case_when(
      `First Week` <= 10 ~ 1,
      `First Week` > 10 &`First Week` <= 20 ~ 2,
      `First Week` > 20 ~ 3
    )
  )
all_avg_long <- all_avg_wide %>%
  pivot_longer(cols = c(2:3), names_to = "Week", values_to = "Average Rank")



# a slopegraph
ggplot(data = all_avg_long) +
  geom_line(aes(x = Week, y = `Average Rank`, group = Sample_ID)) +
  my_theme() +
  scale_y_reverse(breaks = c(1,5,10,15,20,25,30)) +
  labs(
    title = "Average Rank in First and Last Week",
    subtitle = "Pink: overall average\nGreen: average of those who started on first page\nOrange: average of those who started on second page\nRed: average of those who started on third (and fourth) page"
  ) +
  # average:
  geom_line(
    data = all_avg_long %>% group_by(Week) %>% summarise(avg = mean(`Average Rank`)),
    aes(x = Week, y = avg, group = 1),
    color = r_col,
    size = 1
  ) + geom_label_repel(
    data = all_avg_long %>% group_by(Week) %>% summarise(avg = mean(`Average Rank`)),
    aes(x = Week, y = avg, label = as.character(round(avg,1))), 
    size = 3, color = r_col
  ) + 
  # average of those started on first page:
  geom_line(
    data = all_avg_long %>% filter(start_page == 1) %>% group_by(Week) %>% 
      summarise(avg = mean(`Average Rank`)),
    aes(x = Week, y = avg, group = 1),
    color = "green",
    size = 1
  ) + geom_label_repel(
    data = all_avg_long %>% filter(start_page == 1) %>% group_by(Week) %>% 
      summarise(avg = mean(`Average Rank`)),
    aes(x = Week, y = avg, label = as.character(round(avg,1))), 
    size = 3, color = "green"
  ) + 
  # average of those started on 2nd page:
  geom_line(
    data = all_avg_long %>% filter(start_page == 2) %>% group_by(Week) %>% 
      summarise(avg = mean(`Average Rank`)),
    aes(x = Week, y = avg, group = 1),
    color = "orange",
    size = 1
  ) + geom_label_repel(
    data = all_avg_long %>% filter(start_page == 2) %>% group_by(Week) %>% 
      summarise(avg = mean(`Average Rank`)),
    aes(x = Week, y = avg, label = as.character(round(avg,1))), 
    size = 3, color = "orange"
  ) + 
  # average of those started on 3rd (and 4th) page:
  geom_line(
    data = all_avg_long %>% filter(start_page == 3) %>% group_by(Week) %>% 
      summarise(avg = mean(`Average Rank`)),
    aes(x = Week, y = avg, group = 1),
    color = "red",
    size = 1
  ) + geom_label_repel(
    data = all_avg_long %>% filter(start_page == 3) %>% group_by(Week) %>% 
      summarise(avg = mean(`Average Rank`)),
    aes(x = Week, y = avg, label = as.character(round(avg,1))), 
    size = 3, color = "red"
  )

  • Different fate for the law firms. In particular two are plunging.
  • No apparent difference in behavior in function of their start rank. Apparently a bit easier to improve if starting from lower rank.

A paired t-test comparing the average ranking of each law firm during the first week and last week shows that the difference is statistically significant. We reject the null hypothesis that the difference in mean between the first week and last week in zero in favor of the alternative hypothesis: true difference in means is not equal to 0.

t.test(all_avg_wide$`First Week`,
       all_avg_wide$`Last Week`,
       paired=TRUE)
## 
##  Paired t-test
## 
## data:  all_avg_wide$`First Week` and all_avg_wide$`Last Week`
## t = 4.7894, df = 58, p-value = 1.198e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.166379 2.841450
## sample estimates:
## mean of the differences 
##                2.003914

Conclusions

  • Improvement on average of 2 ranks in 1 month. Statistically significant improvement.
  • Rather small improvement in ranking.